knitr::opts_chunk$set(
  warning = TRUE, # show warnings during codebook generation
  message = TRUE, # show messages during codebook generation
  error = TRUE, # do not interrupt codebook generation in case of errors,
                # usually better for debugging
  echo = TRUE  # show R code
)
ggplot2::theme_set(ggplot2::theme_bw())
pander::panderOptions("table.split.table", Inf)
# load libraries
library(codebook)
library(here)
library(dplyr)
library(tidyverse)
library(future)
library(labelled)

This is a data dictionary for the data set used in the paper “Cognates are advantaged in early bilingual expressive vocabulary development”.

#load dataset
keepers_cognate_full <- rio::import(here::here("data_keepers/public_keepers_cognate_full.csv"))

keepers_cognate_matched <- rio::import(here::here("data_keepers/public_keepers_cognate_matched.csv"))

Variables

keepers_cognate_full: Overview

codebook_items(keepers_cognate_full)

Codebook table

keepers_cognate_matched: Overview

codebook_items(keepers_cognate_matched)

Codebook table

subject_id & n_months

  • subject_id: Unique participant ID
  • n_months: The month of administration, where 1 means the participant contributed data the first month after they entered the study. The subsequent numbers refers to the months that they contributed data along the study.

The dataset included N = 47 participants.

keepers_cognate_full %>% 
  mutate(administration = str_c(subject_id, "_", n_months)) %>%
  summarize(N_subject_id = n_distinct(subject_id),
            N_administration = n_distinct(administration))
##   N_subject_id N_administration
## 1           47              219

completed

  • completed_eng: whether the English form was completed that month
  • completed_fr: whether the French form was completed that month
  • completed_both: whether both the English and French forms was completed that month

In the final analysis, we only kept data where both the English and French forms were completed within the same month.

keepers_cognate_full %>% 
  mutate(administration = str_c(subject_id, "_", n_months)) %>%
  distinct(administration, .keep_all = TRUE) %>%
  summarize(n_completed_eng = length(completed_eng),
            n_completed_fr = length(completed_fr),
            n_completed_both = length(completed_both))
##   n_completed_eng n_completed_fr n_completed_both
## 1             219            219              219

age

  • age_days: Age in days, calculated by Date of test - Date of birth (which are both removed from the public data to protect the participants’ information)
  • age_months: Age in months (with decimal), calculated by age_days/(365.25/12)
keepers_cognate_full %>%
  summarize(mean_age_days = mean(age_days, na.rm = T),
            sd_age_days = sd(age_days, na.rm = T),
            min_age_days = min(age_days, na.rm = T),
            max_age_days = max(age_days, na.rm = T))
##   mean_age_days sd_age_days min_age_days max_age_days
## 1      632.4475    77.87396          493          826
keepers_cognate_full %>% 
  ggplot(aes(age_days)) +
  geom_histogram()

sex

  • sex: categorical with 2 levels: Female & Male
keepers_cognate_full %>%
  distinct(subject_id, .keep_all = TRUE) %>% 
  count(sex) %>%
  mutate(percentage = round(n/sum(n)*100, 2)) 
##      sex  n percentage
## 1 Female 24      51.06
## 2   Male 21      44.68
## 3  Other  2       4.26

total_vocab

  • total_eng_vocab: The total number of words the participant produced on the English CDI form within the complete cognate list (i.e., 537 translation equivalents)
  • total_fr_vocab: The total number of words the participant produced on the French CDI form within the complete cognate list (i.e., 537 translation equivalents)
  • total_vocabulary: The total number of vocabulary produced on both the English and French forms within the complete cognate list (i.e., 537 translation equivalents), calculated by total_eng_vocab + total_fr_vocab
keepers_cognate_full %>%
  mutate(administration = str_c(subject_id, "_", n_months)) %>%
  distinct(administration, .keep_all = TRUE) %>%
  pivot_longer(c(matches("total_")), names_to = "type", values_to = "n_words") %>%
  group_by(type) %>%
  summarize(mean_n_words = mean(n_words, na.rm=T),
            sd_n_words = sd(n_words, na.rm=T),
            min_n_words = min(n_words, na.rm=T),
            max_n_words = max(n_words, na.rm=T))
## # A tibble: 3 × 5
##   type             mean_n_words sd_n_words min_n_words max_n_words
##   <chr>                   <dbl>      <dbl>       <int>       <int>
## 1 total_eng_vocab          94.1      119.            0         601
## 2 total_fr_vocab           87.3       90.3           0         527
## 3 total_vocabulary        181.       186.            0         845

word_pairs

  • word_pairs: The translation equivalent pair with the English word listed before the hyphen and the French word after
  • cognate_status: Whether the translation equivalent pair is “cognate” or “non-cognate” (2 levels)
  • english_item_id & english_item: The item in the English CDI form and its item number on the CDI form
  • french_item_id & french_item: The item in the French CDI form and its item number on the CDI form
  • eng_acquired: Whether the participant produced the English item, with 2 levels: 1 = produced and 0 = not yet produced
  • fr_acquired: Whether the participant produced the French item, with 2 levels: 1 = produced and 0 = not yet produced
  • Eng_AoA (only in keepers_cognate_matched.csv): Age of acquisition for each English CDI word retrieved from Wordbankr
  • Fr_AoA (only in keepers_cognate_matched.csv): Age of acquisition for each French CDI word retrieved from Wordbankr

Full list

Number of word pairs in the full list

keepers_cognate_full %>%
  filter(subject_id == 53279 & n_months == 1) %>%
  summarize(n = length(word_pairs))
##     n
## 1 537

Number of cognates vs. non-cognates in the full list

keepers_cognate_full %>%
  filter(subject_id == 53279 & n_months == 1) %>%
  group_by(cognate_status) %>%
  summarize(n = length(cognate_status))
## # A tibble: 2 × 2
##   cognate_status     n
##   <chr>          <int>
## 1 cognate          131
## 2 non-cognate      406

Number of English and French items in the full list

keepers_cognate_full %>%
  filter(subject_id == 53279 & n_months == 1) %>%
  summarize(eng_n = length(english_item),
            fr_n = length(french_item))
##   eng_n fr_n
## 1   537  537

Matched list

Number of word pairs in the matched list

keepers_cognate_matched %>%
  filter(subject_id == 53279 & n_months == 1) %>%
  summarize(n = length(word_pairs))
##     n
## 1 162

Number of cognates vs. non-cognates in the matched list

keepers_cognate_matched %>%
  filter(subject_id == 53279 & n_months == 1) %>%
  group_by(cognate_status) %>%
  summarize(n = length(cognate_status))
## # A tibble: 2 × 2
##   cognate_status     n
##   <chr>          <int>
## 1 cognate           81
## 2 non-cognate       81

Number of English and French items in the matched list

keepers_cognate_matched %>%
  filter(subject_id == 53279 & n_months == 1) %>%
  summarize(eng_n = length(english_item),
            fr_n = length(french_item))
##   eng_n fr_n
## 1   162  162